{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1e39a03a-a70e-4fdf-a630-92252c304d31",
   "metadata": {},
   "source": [
    "# Parse \"Table of Nuclear Electric Quadrupole Moments\"\n",
    "\n",
    "source: https://www-nds.iaea.org/publications/indc/indc-nds-0833/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7973671-96f6-45ec-b10c-2d03b8e37453",
   "metadata": {},
   "outputs": [],
   "source": [
    "from fractions import Fraction\n",
    "from pathlib import Path\n",
    "import re\n",
    "\n",
    "import camelot\n",
    "from camelot.io import read_pdf\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import polars as pl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0ada16e-5b13-44ef-91cb-96ca935739f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "pl.Config.set_tbl_rows(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33e431f2-3553-4b35-9915-61d6a3b82d7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "pdf = Path(\"../data/indc-nds-0833.pdf\")\n",
    "tabs = read_pdf(\n",
    "    str(pdf),\n",
    "    pages=\"19-66\",\n",
    "    flavor=\"stream\",\n",
    "    columns=[\"115,171,212,261,306,385,444,508,585\"],\n",
    "    table_areas=[\"50,545,701,50\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54e803e1-7854-4e6c-b367-4cb41a3ef9ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.concat([t.df.replace(\"\", None).dropna(axis=\"index\", how=\"all\") for t in tabs])\n",
    "# drop first row and set columns\n",
    "df = df.iloc[1:]\n",
    "df.columns = [\n",
    "    \"Element\",\n",
    "    \"Isotope\",\n",
    "    \"E(level)\",\n",
    "    \"T1/2\",\n",
    "    \"s/p\",\n",
    "    \"Q(b)\",\n",
    "    \"ref.std.\",\n",
    "    \"method\",\n",
    "    \"NSR Keynumber\",\n",
    "    \"Journal Reference\",\n",
    "]\n",
    "# save\n",
    "df.to_parquet(\"nuc-el-quad-mom-raw.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0476f06-3422-4d41-8fbb-c3e4b659d031",
   "metadata": {},
   "source": [
    "## Process extracted table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "474aa3e4-1384-4149-b20f-2995b17f4cf0",
   "metadata": {},
   "outputs": [],
   "source": [
    "qmom = pl.read_parquet(\"nuc-el-quad-mom-raw.parquet\").select(\n",
    "    [\"Isotope\", \"E(level)\", \"s/p\", \"Q(b)\"]\n",
    ")\n",
    "\n",
    "# drop all rows where all values are null and keep only ground state nuclei\n",
    "qmom = qmom.drop_nulls(subset=\"Q(b)\").filter(pl.col(\"E(level)\") == \"0\")\n",
    "\n",
    "# get atomic and mass number and symbol\n",
    "pattern = r\"(\\d+)\\s*([A-Za-z]+)\\s*(\\d+)\"\n",
    "qmom = (\n",
    "    qmom.with_columns(captures=pl.col(\"Isotope\").str.extract_groups(pattern))\n",
    "    .with_columns(\n",
    "        atomic_number=pl.col(\"captures\").struct[\"1\"].cast(pl.Int64),\n",
    "        symbol=pl.col(\"captures\").struct[\"2\"],\n",
    "        mass_number=pl.col(\"captures\").struct[\"3\"].cast(pl.Int64),\n",
    "    )\n",
    "    .drop(\"captures\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37fa7b18-6a4a-492e-b444-fc021db547ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "# parse spin and parity\n",
    "spin_re = r\"(?P<spin>\\d+/\\d+|\\d+)(?P<parity>[+-]?)\"\n",
    "qmom = qmom.with_columns(captures=pl.col(\"s/p\").str.extract_groups(spin_re)).unnest(\n",
    "    \"captures\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9bf442b6-cdc1-4926-abfb-2af07720b21a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# parse nuclear electric quadrupole moments and uncertainties\n",
    "val_unc_re = r\"(?P<value_sign>[+-])?(?P<value_lead>\\d+)\\.?(?P<value_decimals>\\d+)?\\s*\\(?(?P<value_unc>\\d+(?:\\.\\d+)?)\\)?\"\n",
    "qmom = (\n",
    "    qmom.with_columns(captures=pl.col(\"Q(b)\").str.extract_groups(val_unc_re))\n",
    "    .unnest(\"captures\")\n",
    "    .with_columns(\n",
    "        value_precision=pl.col(\"value_decimals\").str.len_chars().cast(pl.Int32),\n",
    "    )\n",
    "    .with_columns(\n",
    "        quadrupole_moment=pl.concat_str(\n",
    "            pl.col(\"value_sign\"),\n",
    "            pl.col(\"value_lead\"),\n",
    "            pl.lit(\".\"),\n",
    "            pl.col(\"value_decimals\"),\n",
    "            ignore_nulls=True,\n",
    "        ).cast(pl.Float32),\n",
    "        quadrupole_moment_uncertainty=pl.lit(10.0, dtype=pl.Float32).pow(\n",
    "            -pl.col(\"value_precision\").cast(pl.Float32)\n",
    "        )\n",
    "        * pl.col(\"value_unc\").cast(pl.Float32),\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fe7762be-1a11-45b7-9b17-a92078a297c4",
   "metadata": {},
   "source": [
    "## Fetch isotope table for comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "521e425a-b84a-4853-a108-869bac02d324",
   "metadata": {},
   "outputs": [],
   "source": [
    "from mendeleev.fetch import fetch_table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87db5223-629c-489e-b663-aca80c7765a9",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "isotopes = pl.from_pandas(fetch_table(\"isotopes\"))\n",
    "isotopes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05542fb1-b785-447a-9b05-01be6f20552a",
   "metadata": {},
   "outputs": [],
   "source": [
    "qmom.select(\n",
    "    [\n",
    "        \"atomic_number\",\n",
    "        \"symbol\",\n",
    "        \"mass_number\",\n",
    "        \"spin\",\n",
    "        \"parity\",\n",
    "        \"quadrupole_moment\",\n",
    "        \"quadrupole_moment_uncertainty\",\n",
    "    ]\n",
    ").join(\n",
    "    isotopes, on=[\"atomic_number\", \"mass_number\"], suffix=\"_true\", how=\"left\"\n",
    ").select(\n",
    "    [\n",
    "        \"atomic_number\",\n",
    "        \"symbol\",\n",
    "        \"mass_number\",\n",
    "        \"spin\",\n",
    "        \"spin_true\",\n",
    "        \"parity\",\n",
    "        \"parity_true\",\n",
    "        \"quadrupole_moment\",\n",
    "        \"quadrupole_moment_true\",\n",
    "        \"quadrupole_moment_uncertainty\",\n",
    "        \"quadrupole_moment_uncertainty_true\",\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0943f87-bafe-4da3-a124-d4e1c67844d6",
   "metadata": {},
   "source": [
    "## Update values in mendleev db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eaa4a511-da4b-466d-8064-2f7b30e9cfca",
   "metadata": {},
   "outputs": [],
   "source": [
    "from mendeleev.db import get_session, get_engine\n",
    "from mendeleev.models import Isotope"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d77853e3-a184-4ad7-8363-0fe3e0ff1918",
   "metadata": {},
   "outputs": [],
   "source": [
    "session = get_session(read_only=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "532770e6-ac25-4cdd-a34e-494932ad62e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "fields = {\"spin\", \"parity\", \"quadrupole_moment\", \"quadrupole_moment_uncertainty\"}\n",
    "for row in qmom.iter_rows(named=True):\n",
    "    iso = (\n",
    "        session.query(Isotope)\n",
    "        .filter_by(atomic_number=row[\"atomic_number\"], mass_number=row[\"mass_number\"])\n",
    "        .update({k: v for k, v in row.items() if k in fields})\n",
    "    )\n",
    "    # session.rollback()\n",
    "    session.commit()\n",
    "session.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "53231336-b3f2-415e-8bcc-6311ba2b6bb7",
   "metadata": {},
   "source": [
    "## Validate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e46e9a2-60d0-4463-8b95-ea68b2c539b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from mendeleev import H"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30a89787-c07d-4142-98a7-b4477727576e",
   "metadata": {},
   "outputs": [],
   "source": [
    "H.isotope(2).quadrupole_moment"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
